#Introduction This document is an analysis of the classification and origin of 2410 brands of beer produced by 558 breweries across the continential United States.
Of particular interest was the high concentration of unique beer brand production in breweries on the west coast, the significantly above average number of IPA style beers, and majority of beers that cluster around low alcohol content and bitterness.
Colorado has 39 breweries reported in the data, followed by California with 37. Additional information on the destinations of beer produced in these states and further research could determine if this indicates a reigonally well established demand for local beer variety, corporate interests in the area, or an oversaturated production environment.
Of the 2410 brands represented in the data, 17.6% of them are IPA style. The next 10% are APA, and 5.5% American Amber/Pale Ale. The top 3 most populous styles of beer make up more than 1/3 of all the reported beer brands. Further research could determine if this stylistic majority represents a marketplace glut or genuine demand for beers in these styles.
(percentage from graph) of the beer brands in the data have an alcohol content of 5% or less, with a bitterness rating of 25, with only (percentage) of brands exceeding 10% ABV and 100 IBU. This indicates that most breweries believe that beers around this level of bitterness and alcohol have the highest rate of consumption. Further research could determine if the alcohol content and bitterness level of a beer is a factor in popularity, or if production cost or legal stipulations make production more attractive to breweries.
#'x' is the column of a data.frame that holds 2 digit state codes
stateFromLower <-function(x) {
#read 52 state codes into local variable [includes DC (Washington D.C. and PR (Puerto Rico)]
st.codes<-data.frame(
state=as.factor(c("AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", "GA",
"HI", "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME",
"MI", "MN", "MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM",
"NV", "NY", "OH", "OK", "OR", "PA", "PR", "RI", "SC", "SD", "TN",
"TX", "UT", "VA", "VT", "WA", "WI", "WV", "WY")),
full=as.factor(c("alaska","alabama","arkansas","arizona","california","colorado",
"connecticut","district of columbia","delaware","florida","georgia",
"hawaii","iowa","idaho","illinois","indiana","kansas","kentucky",
"louisiana","massachusetts","maryland","maine","michigan","minnesota",
"missouri","mississippi","montana","north carolina","north dakota",
"nebraska","new hampshire","new jersey","new mexico","nevada",
"new york","ohio","oklahoma","oregon","pennsylvania","puerto rico",
"rhode island","south carolina","south dakota","tennessee","texas",
"utah","virginia","vermont","washington","wisconsin",
"west virginia","wyoming"))
)
#create an nx1 data.frame of state codes from source column
st.x<-data.frame(state=x)
#match source codes with codes from 'st.codes' local variable and use to return the full state name
refac.x<-st.codes$full[match(st.x$state,st.codes$state)]
#return the full state names in the same order in which they appeared in the original source
return(refac.x)
}
#1. How many breweries are present in each state?
beer_path<-file.path(getwd(), "Raw Data", "Beers.csv")
brewery_path<-file.path(getwd(), "Raw Data", "Breweries.csv")
beer_df=read.csv(beer_path)
brewery_df=read.csv(brewery_path)
# returns string w/o leading whitespace
trim <- function (x) sub("^\\s+", "", x)
brewery_df$State <- trim(brewery_df$State)
# covert state to full state name
brewery_df$State_full<-stateFromLower(brewery_df$State)
count_by_state <- data.frame(table(brewery_df$State_full))
# using maps to get coordinate and use ggplot to plot maps
library(maps)
library(ggplot2)
library(ggthemes)
us_states <- map_data("state")
head(us_states,10)
## long lat group order region subregion
## 1 -87.46201 30.38968 1 1 alabama <NA>
## 2 -87.48493 30.37249 1 2 alabama <NA>
## 3 -87.52503 30.37249 1 3 alabama <NA>
## 4 -87.53076 30.33239 1 4 alabama <NA>
## 5 -87.57087 30.32665 1 5 alabama <NA>
## 6 -87.58806 30.32665 1 6 alabama <NA>
## 7 -87.59379 30.30947 1 7 alabama <NA>
## 8 -87.59379 30.28655 1 8 alabama <NA>
## 9 -87.67400 30.27509 1 9 alabama <NA>
## 10 -87.81152 30.25790 1 10 alabama <NA>
beer_brew_df<-merge(x=beer_df,y=brewery_df,by.x="Brewery_id",by.y="Brew_ID",all.x = TRUE)
beer_brew_df_map<-merge(x=count_by_state,y=us_states,by.x="Var1",by.y="region",all.x = TRUE)
p <- ggplot(data = beer_brew_df_map,
aes(x = long, y = lat,
group = group, fill = Freq))
p + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
labs(title = "Brewery Distribution") +
theme_map()
#2. Merge beer data with the breweries data. Print the first 6 observations and the last six observations to check the merged file.
beer_brew_df<-merge(x=beer_df,y=brewery_df,by.x="Brewery_id",by.y="Brew_ID",all.x = TRUE)
head(beer_brew_df,6)
## Brewery_id Name.x Beer_ID ABV IBU
## 1 1 Get Together 2692 0.045 50
## 2 1 Maggie's Leap 2691 0.049 26
## 3 1 Wall's End 2690 0.048 19
## 4 1 Pumpion 2689 0.060 38
## 5 1 Stronghold 2688 0.060 25
## 6 1 Parapet ESB 2687 0.056 47
## Style Ounces Name.y
## 1 American IPA 16 NorthGate Brewing
## 2 Milk / Sweet Stout 16 NorthGate Brewing
## 3 English Brown Ale 16 NorthGate Brewing
## 4 Pumpkin Ale 16 NorthGate Brewing
## 5 American Porter 16 NorthGate Brewing
## 6 Extra Special / Strong Bitter (ESB) 16 NorthGate Brewing
## City State State_full
## 1 Minneapolis MN minnesota
## 2 Minneapolis MN minnesota
## 3 Minneapolis MN minnesota
## 4 Minneapolis MN minnesota
## 5 Minneapolis MN minnesota
## 6 Minneapolis MN minnesota
tail(beer_brew_df,6)
## Brewery_id Name.x Beer_ID ABV IBU
## 2405 556 Pilsner Ukiah 98 0.055 NA
## 2406 557 Heinnieweisse Weissebier 52 0.049 NA
## 2407 557 Snapperhead IPA 51 0.068 NA
## 2408 557 Moo Thunder Stout 50 0.049 NA
## 2409 557 Porkslap Pale Ale 49 0.043 NA
## 2410 558 Urban Wilderness Pale Ale 30 0.049 NA
## Style Ounces Name.y
## 2405 German Pilsener 12 Ukiah Brewing Company
## 2406 Hefeweizen 12 Butternuts Beer and Ale
## 2407 American IPA 12 Butternuts Beer and Ale
## 2408 Milk / Sweet Stout 12 Butternuts Beer and Ale
## 2409 American Pale Ale (APA) 12 Butternuts Beer and Ale
## 2410 English Pale Ale 12 Sleeping Lady Brewing Company
## City State State_full
## 2405 Ukiah CA california
## 2406 Garrattsville NY new york
## 2407 Garrattsville NY new york
## 2408 Garrattsville NY new york
## 2409 Garrattsville NY new york
## 2410 Anchorage AK alaska
#3. Report the number of NA’s in each column.
summary(beer_df$ABV) #62 na
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00100 0.05000 0.05600 0.05977 0.06700 0.12800 62
summary(beer_df$IBU) #1005 na
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 4.00 21.00 35.00 42.71 64.00 138.00 1005
sapply(beer_brew_df, function(x) sum(is.na(x))) #another check
## Brewery_id Name.x Beer_ID ABV IBU Style
## 0 0 0 62 1005 0
## Ounces Name.y City State State_full
## 0 0 0 0 0
#optional remove NA's
cmerge <- beer_brew_df[complete.cases(beer_brew_df),]
#sapply(cmerge, function(x) sum(is.na(x))) #to check cmerge for NA's
#filter by abv/ibu to get boxes for slide 8
#less than 100 IBU, 10% ABV
bbox<- subset(cmerge, cmerge$ABV <0.1 & cmerge$IBU < 100)
#1353/1405, 96.2%
#less than 50 IBU, 7.5% ABV
rbox <- subset(cmerge, cmerge$ABV < 0.06 &cmerge$ABV > 0.03 & cmerge$IBU <50)
#740/1405, 52.6%
There are 62 beers with no reported ABV, and 1005 beers with no reported IBU
#4. Compute the median alcohol content and international bitterness unit for each state. Plot a bar chart to compare.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
median_IBU_by_state <- beer_brew_df %>%
group_by(State_full)%>%
summarise(Median=median(IBU,na.rm=TRUE))
median_IBU_by_state[is.na(median_IBU_by_state$Median),]
## # A tibble: 1 x 2
## State_full Median
## <fct> <dbl>
## 1 south dakota NA
# *** state "SD" no Median, All IBU are NA in SD
distinct(beer_brew_df,State)
## State
## 1 MN
## 2 KY
## 3 MA
## 4 CA
## 5 SC
## 6 CO
## 7 MI
## 8 IN
## 9 TX
## 10 AZ
## 11 MO
## 12 WI
## 13 IL
## 14 VT
## 15 ME
## 16 PA
## 17 KS
## 18 NY
## 19 NH
## 20 GA
## 21 VA
## 22 FL
## 23 MD
## 24 NC
## 25 WY
## 26 OR
## 27 RI
## 28 CT
## 29 OH
## 30 AK
## 31 MS
## 32 AR
## 33 LA
## 34 WV
## 35 UT
## 36 ID
## 37 WA
## 38 OK
## 39 NE
## 40 HI
## 41 IA
## 42 SD
## 43 NJ
## 44 MT
## 45 DC
## 46 NV
## 47 TN
## 48 NM
## 49 AL
## 50 DE
## 51 ND
beer_brew_df[beer_brew_df$State=='SD',]
## Brewery_id Name.x Beer_ID ABV IBU
## 1237 213 Red Water Irish Style Red 2145 0.065 NA
## 1238 213 Mjöllnir 1804 0.066 NA
## 1239 213 Bear Butte Nut Brown Ale 1602 0.055 NA
## 1240 213 Easy Livin' Summer Ale 1301 0.045 NA
## 1241 213 Canyon Cream Ale 542 0.055 NA
## 1242 213 Pile O'Dirt Porter 272 0.069 NA
## 1243 213 11th Hour IPA 271 0.060 NA
## Style Ounces Name.y City
## 1237 American Amber / Red Ale 12 Crow Peak Brewing Company Spearfish
## 1238 Herbed / Spiced Beer 12 Crow Peak Brewing Company Spearfish
## 1239 American Brown Ale 12 Crow Peak Brewing Company Spearfish
## 1240 American Blonde Ale 12 Crow Peak Brewing Company Spearfish
## 1241 Cream Ale 12 Crow Peak Brewing Company Spearfish
## 1242 American Porter 12 Crow Peak Brewing Company Spearfish
## 1243 American IPA 12 Crow Peak Brewing Company Spearfish
## State State_full
## 1237 SD south dakota
## 1238 SD south dakota
## 1239 SD south dakota
## 1240 SD south dakota
## 1241 SD south dakota
## 1242 SD south dakota
## 1243 SD south dakota
ggplot(median_IBU_by_state, aes(x=reorder(State_full,-Median),y=Median,fill=State_full)) +
geom_bar(stat="identity") +
labs(title="IBU Median by State",subtitle="",x="State",y="Median",caption="") +
theme(plot.title=element_text(hjust=0.5),axis.text.x = element_text(angle = 90, hjust = 1),legend.position="none")
## Warning: Removed 1 rows containing missing values (position_stack).
National Average ABV is 5.9% National Average IBU is 42.7
#5. Which state has the maximum alcoholic (ABV) beer? Which state has the most bitter (IBU) beer?
ABV_by_state <- beer_brew_df %>%
group_by(State_full)%>%
summarise(Mean=mean(ABV,na.rm=TRUE),Median=median(ABV,na.rm=TRUE))
ABV_by_state
## # A tibble: 51 x 3
## State_full Mean Median
## <fct> <dbl> <dbl>
## 1 alabama 0.062 0.06
## 2 alaska 0.0556 0.056
## 3 arizona 0.0602 0.055
## 4 arkansas 0.052 0.052
## 5 california 0.0611 0.058
## 6 colorado 0.0634 0.0605
## 7 connecticut 0.0611 0.06
## 8 delaware 0.055 0.055
## 9 district of columbia 0.0656 0.0625
## 10 florida 0.0599 0.057
## # … with 41 more rows
IBU_by_state <- beer_brew_df %>%
group_by(State_full)%>%
summarise(Mean=mean(IBU,na.rm=TRUE),Median=median(IBU,na.rm=TRUE))
IBU_by_state
## # A tibble: 51 x 3
## State_full Mean Median
## <fct> <dbl> <dbl>
## 1 alabama 51.2 43
## 2 alaska 40.9 46
## 3 arizona 35.2 20.5
## 4 arkansas 39 39
## 5 california 46.3 42
## 6 colorado 47.4 40
## 7 connecticut 40.8 29
## 8 delaware 52 52
## 9 district of columbia 55.2 47.5
## 10 florida 46.8 55
## # … with 41 more rows
#Top 10 ABV state (Mean)
top_n(ABV_by_state, n=10, Mean) %>%
ggplot(., aes(x=reorder(State_full,-Mean),y=Mean,fill=State_full)) +
geom_bar(stat="identity") +
labs(title="Top 10 ABV Average by State",subtitle="",x="State",y="Average",caption="") +
theme(plot.title=element_text(hjust=0.5),axis.text.x = element_text(angle = 90, hjust = 1),legend.position="none")
#Top 10 IBU state (Mean)
top_n(IBU_by_state, n=10, Mean) %>%
ggplot(., aes(x=reorder(State_full,-Mean),y=Mean,fill=State_full)) +
geom_bar(stat="identity") +
labs(title="Top 10 IBU Average by State",subtitle="",x="State",y="Average",caption="") +
theme(plot.title=element_text(hjust=0.5),axis.text.x = element_text(angle = 90, hjust = 1),legend.position="none")
#--------
summary(beer_brew_df)
## Brewery_id Name.x Beer_ID
## Min. : 1.0 Nonstop Hef Hop : 12 Min. : 1.0
## 1st Qu.: 94.0 Dale's Pale Ale : 6 1st Qu.: 808.2
## Median :206.0 Oktoberfest : 6 Median :1453.5
## Mean :232.7 Longboard Island Lager: 4 Mean :1431.1
## 3rd Qu.:367.0 1327 Pod's ESB : 3 3rd Qu.:2075.8
## Max. :558.0 Boston Lager : 3 Max. :2692.0
## (Other) :2376
## ABV IBU Style
## Min. :0.00100 Min. : 4.00 American IPA : 424
## 1st Qu.:0.05000 1st Qu.: 21.00 American Pale Ale (APA) : 245
## Median :0.05600 Median : 35.00 American Amber / Red Ale : 133
## Mean :0.05977 Mean : 42.71 American Blonde Ale : 108
## 3rd Qu.:0.06700 3rd Qu.: 64.00 American Double / Imperial IPA: 105
## Max. :0.12800 Max. :138.00 American Pale Wheat Ale : 97
## NA's :62 NA's :1005 (Other) :1298
## Ounces Name.y City
## Min. : 8.40 Brewery Vivant : 62 Grand Rapids: 66
## 1st Qu.:12.00 Oskar Blues Brewery : 46 Portland : 64
## Median :12.00 Sun King Brewing Company : 38 Chicago : 55
## Mean :13.59 Cigar City Brewing Company: 25 Indianapolis: 43
## 3rd Qu.:16.00 Sixpoint Craft Ales : 24 San Diego : 42
## Max. :32.00 Hopworks Urban Brewery : 23 Boulder : 41
## (Other) :2192 (Other) :2099
## State State_full
## Length:2410 colorado : 265
## Class :character california: 183
## Mode :character michigan : 162
## indiana : 139
## texas : 130
## oregon : 125
## (Other) :1406
median_ABV_by_state <- beer_brew_df %>%
group_by(State_full)%>%
summarise(Median=median(ABV,na.rm=TRUE))
#Top 10 ABV state
top_n(median_ABV_by_state, n=10, Median) %>%
ggplot(., aes(x=reorder(State_full,-Median),y=Median,fill=State_full)) +
geom_bar(stat="identity") +
labs(title="Top 10 ABV Median by State",subtitle="",x="State",y="Median",caption="") +
theme(plot.title=element_text(hjust=0.5),axis.text.x = element_text(angle = 90, hjust = 1),legend.position="none") #+ geom_smooth(method = "lm")
#Top 10 IBU state
top_n(median_IBU_by_state, n=10, Median) %>%
ggplot(., aes(x=reorder(State_full,-Median),y=Median,fill=State_full)) +
geom_bar(stat="identity") +
labs(title="Top 10 IBU Median by State",subtitle="",x="State",y="Median",caption="") +
theme(plot.title=element_text(hjust=0.5),axis.text.x = element_text(angle = 90, hjust = 1),legend.position="none")
#6. Summary statistics for the ABV variable.
summary(beer_brew_df$ABV)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00100 0.05000 0.05600 0.05977 0.06700 0.12800 62
#7. Is there an apparent relationship between the bitterness of the beer and its alcoholic content? Draw a scatter plot.
ggplot(beer_brew_df, aes(x=ABV, y=IBU)) +
geom_point(shape=18, color="blue")+
geom_smooth(method=lm, linetype="dashed",
color="darkred", fill="blue")
## Warning: Removed 1005 rows containing non-finite values (stat_smooth).
## Warning: Removed 1005 rows containing missing values (geom_point).
#Extra: ABV, IBU by style
beer_df$Style_cat='Unknown'
#clean special characters
beer_df$Style <- gsub('Kölsch', 'Kolsch', beer_df$Style)
beer_df$Style <- gsub('Märzen', 'Marzen', beer_df$Style)
beer_df$Style <- gsub('Bière', 'Biere', beer_df$Style)
#IPA
beer_df$Style_cat <- ifelse(grepl("India Pale Ale", ignore.case=TRUE, beer_df$Style), "IPA", beer_df$Style_cat)
beer_df$Style_cat <- ifelse(grepl("IPA", ignore.case=TRUE, beer_df$Style), "IPA", beer_df$Style_cat)
#Porters
beer_df$Style_cat <- ifelse(grepl("porter", ignore.case=TRUE, beer_df$Style), "Porters", beer_df$Style_cat)
#Stouts
beer_df$Style_cat <- ifelse(grepl("stout", ignore.case=TRUE, beer_df$Style), "Stouts", beer_df$Style_cat)
#Strong Ales
strong_ales_key <-c('red ale','wine ale', 'old ale', 'wine', 'strong ale','quadrupel','tripel','Scotch Ale','Wee Heavy')
beer_df$Style_cat <- ifelse(grepl(paste(strong_ales_key,collapse="|"), ignore.case=TRUE, beer_df$Style), "Strong Ales", beer_df$Style_cat)
#Wheat Beers
wheat_beer_key <- c('wheat', 'Witbier', 'Weissbier','Dunkelweizen','Hefeweizen')
beer_df$Style_cat <- ifelse(grepl(paste(wheat_beer_key,collapse="|"), ignore.case=TRUE, beer_df$Style), "Wheat Beers", beer_df$Style_cat)
#Wild or Sour Beers
wild_beer_key <-c('Gose', 'wild ale','oud bruin','Flanders red ale')
beer_df$Style_cat <- ifelse(grepl(paste(wild_beer_key,collapse="|"), ignore.case=TRUE, beer_df$Style), "Wild or Sour Beers", beer_df$Style_cat)
#Hybrid Beers
hybrid_beer_key <- c('california common', 'Garde', 'cream ale','kolsch','Altbier','braggot')
beer_df$Style_cat <- ifelse(grepl(paste(hybrid_beer_key,collapse="|"), ignore.case=TRUE, beer_df$Style), "Hybrid Beers", beer_df$Style_cat)
#Dark Lagers
dark_key <-c('schwarzbier','Marzen','Oktoberfest')
beer_df$Style_cat <- ifelse(grepl(paste(dark_key,collapse="|"), ignore.case=TRUE, beer_df$Style), "Dark Lagers", beer_df$Style_cat)
#Bocks
bock_key<-c('Weizen','bock')
beer_df$Style_cat <- ifelse(grepl(paste(bock_key,collapse="|"), ignore.case=TRUE, beer_df$Style), "Bocks", beer_df$Style_cat)
#Dark Ales
dark_ale_key<-c('black ale', 'dark ale', 'dubbel', 'roggenbier', 'scottish ale', 'winter warmer')
beer_df$Style_cat <- ifelse(grepl(paste(dark_ale_key,collapse="|"), ignore.case=TRUE, beer_df$Style), "Dark Ales", beer_df$Style_cat)
#Brown Ales
brown_key<-c('Brown Ale','Oud Bruin','Mild')
beer_df$Style_cat <- ifelse(grepl(paste(brown_key,collapse="|"), ignore.case=TRUE, beer_df$Style), "Brown Ales", beer_df$Style_cat)
#Specialty Beers
spec_key<-c('Shandy','cider','mead','radler','chile beer','low alcohol beer','black ale','Pumpkin Ale', 'Rye Beer', 'Smoked Beer', 'rauchbier', 'herbed','spiced','Fruit','Vegetable')
beer_df$Style_cat <- ifelse(grepl(paste(spec_key,collapse="|"), ignore.case=TRUE, beer_df$Style), "Specialty Beers", beer_df$Style_cat)
#Pale Ales
pal_key<-c('blonde ale', 'bitter', 'ESB', 'pale ale', 'Abbey Single Ale','saison','grisette')
beer_df$Style_cat <- ifelse(grepl(paste(pal_key,collapse="|"), ignore.case=TRUE, beer_df$Style), "Pale Ales", beer_df$Style_cat)
#Pilsener and Pale Lager
pil_key<-c('pilsener', 'pilsner','Helles','Keller Bier','Zwickel Bier','Malt Liquor','american lager')
beer_df$Style_cat <- ifelse(grepl(paste(pil_key,collapse="|"), ignore.case=TRUE, beer_df$Style), 'Pilsener and Pale Lager', beer_df$Style_cat)
#Lager
lager_key<-c(' lager')
beer_df$Style_cat <- ifelse(grepl(paste(lager_key,collapse="|"), ignore.case=TRUE, beer_df$Style), "Lager", beer_df$Style_cat)
beer_brew_df<-merge(x=beer_df,y=brewery_df,by.x="Brewery_id",by.y="Brew_ID",all.x = TRUE)
ABV_by_style <- beer_brew_df %>%
group_by(Style_cat)%>%
summarise(Mean=mean(ABV,na.rm=TRUE),Median=median(ABV,na.rm=TRUE))
ABV_by_style
## # A tibble: 16 x 3
## Style_cat Mean Median
## <chr> <dbl> <dbl>
## 1 Bocks 0.0558 0.052
## 2 Brown Ales 0.0562 0.055
## 3 Dark Ales 0.0709 0.0695
## 4 Dark Lagers 0.0568 0.055
## 5 Hybrid Beers 0.0547 0.054
## 6 IPA 0.0689 0.068
## 7 Lager 0.0501 0.05
## 8 Pale Ales 0.0551 0.054
## 9 Pilsener and Pale Lager 0.0518 0.05
## 10 Porters 0.0618 0.06
## 11 Specialty Beers 0.0597 0.059
## 12 Stouts 0.0687 0.0635
## 13 Strong Ales 0.0647 0.058
## 14 Unknown 0.0493 0.05
## 15 Wheat Beers 0.0499 0.05
## 16 Wild or Sour Beers 0.0563 0.05
IBU_by_style <- beer_brew_df %>%
group_by(Style_cat)%>%
summarise(Mean=mean(IBU,na.rm=TRUE),Median=median(IBU,na.rm=TRUE))
IBU_by_style
## # A tibble: 16 x 3
## Style_cat Mean Median
## <chr> <dbl> <dbl>
## 1 Bocks 18.6 16
## 2 Brown Ales 27.9 25
## 3 Dark Ales 27.5 22.5
## 4 Dark Lagers 24.2 24
## 5 Hybrid Beers 25.6 23
## 6 IPA 72.3 70
## 7 Lager 22.9 19
## 8 Pale Ales 37.2 36
## 9 Pilsener and Pale Lager 31.3 30
## 10 Porters 33.5 32
## 11 Specialty Beers 34.5 20
## 12 Stouts 45.0 38
## 13 Strong Ales 39.0 30.5
## 14 Unknown 21.7 22
## 15 Wheat Beers 19.3 18
## 16 Wild or Sour Beers 13.3 10
#Top 10 ABV Style (Mean)
top_n(ABV_by_style, n=99, Mean) %>%
ggplot(., aes(x=reorder(Style_cat,-Mean),y=Mean,fill=Style_cat)) +
geom_bar(stat="identity") +
labs(title="Top 10 ABV Average by Style",subtitle="",x="Style",y="Average",caption="") +
theme(plot.title=element_text(hjust=0.5),axis.text.x = element_text(angle = 90, hjust = 1),legend.position="none")
#Top 10 IBU Style (Mean)
top_n(IBU_by_style, n=99, Mean) %>%
ggplot(., aes(x=reorder(Style_cat,-Mean),y=Mean,fill=Style_cat)) +
geom_bar(stat="identity") +
labs(title="Top 10 IBU Average by Style",subtitle="",x="Style",y="Average",caption="") +
theme(plot.title=element_text(hjust=0.5),axis.text.x = element_text(angle = 90, hjust = 1),legend.position="none")
##extra2 style pie chart
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
count_by_style_cat <- data.frame(table(beer_brew_df$Style_cat))
names(count_by_style_cat)<-c("Style_cat","Freq")
percent_by_style_cat <- data.frame(count_by_style_cat$Style_cat, count_by_style_cat$Freq/sum(count_by_style_cat$Freq)*100)
names(percent_by_style_cat) <- c("Style", "percent")
p <- plot_ly(percent_by_style_cat, labels = ~Style, values = ~percent, type = 'pie',textposition = 'outside',textinfo = 'label+percent', height=600) %>%
layout(title = 'Style Distribution',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
showlegend = FALSE, margin= list(l=50,r=50,b=100,t=100,pad=4))
p